library(ReinforcementLearningwithR)
require(compiler)
library(tictoc)
strat <- "TikTak1"
antistrat <- get.antistrat()[strat]
set.seed(123456)
file.name <- paste0("opt.run.",strat,".",Sys.Date(),".RNN")
continue <- FALSE
a.MC <- c(0.1)
a.Q <- c(0.1)
hybrid.decay <- c(0.98)
block.no <- 500 #Number of Blocks to Play
eval.no <- 1000 #Evaluation at the end
rounds.no <- 60 #Number of periods of game
generate.best.strat <- function(strat, antistrat, a.MC, a.Q, hybrid.decay, block.no, eval.no, rounds.no, continue=FALSE, file.name){
restore.point("generate.best.strat")
my.strat <- strat
encoding.state <- c("TimeSeries.minimal")
eval.strat <- Model.strat.RNN.TimeSeries.minimal
game.object <- Get.Game.Object.PD(encoding.state = encoding.state, eval.strategy = eval.strat, encoding.params=NULL)
game.object$game.pars$T <- rounds.no
game.object$game.pars$T.max <- rounds.no
game.object$game.pars$delta <- 0.95
game.object$game.pars$other.strategies <- c(get(my.strat))
names(game.object$game.pars$other.strategies) <- my.strat
game.object$game.pars$err.D.prob <- 0.15
game.object$game.pars$err.C.prob <- 0.0
assign("game.object",game.object,envir=.GlobalEnv)
algo.par <- Get.Def.Par.QLearningPersExpPath()
algo.par$action.policy <- "exploration.path" #May be 'exploration.path' or 'epsilon.greedy'
algo.par$expl.path.multi.start <- 0.85
algo.par$expl.path.multi.end <- 0.95
algo.par$expl.path.multi.decay.type <- "linear"
algo.par$expl.path.multi.best.db <- 100
algo.par$expl.path.multi.best.disc <- 0.98
algo.par$expl.path.multi.Kp.var <- 0.005 # proportional gain
algo.par$expl.path.multi.Ki.var <- 0.00001 # integral time
algo.par$expl.path.multi.Kd.var <- 0.000005 # derivative time
algo.par$expl.path.multi.Kp.shock <- 0.005 # proportional gain
algo.par$expl.path.multi.Ki.shock <- 0.00001 # integral time
algo.par$expl.path.multi.Kd.shock <- 0.000005 # derivative time
algo.par$expl.path.multi.Kp.surp <- 0.5 # proportional gain
algo.par$expl.path.multi.Ki.surp <- 0.2 # integral time
algo.par$expl.path.multi.Kd.surp <- 0.05 # derivative time
algo.par$expl.path.multi.Kp.fam <- 0.5 # proportional gain
algo.par$expl.path.multi.Ki.fam <- 0.02 # integral time
algo.par$expl.path.multi.Kd.fam <- 0.01 # derivative time
algo.par$expl.path.multi.Kp.db <- 5 # data base for proportional
algo.par$expl.path.multi.Ki.db <- 100 # data base for integral
algo.par$expl.path.multi.Kd.db <- 5 # data base for calculating derivative
algo.par$expl.path.multi.Kp.disc <- 0.95 # discounting factor for proportional derivative
algo.par$expl.path.multi.Ki.disc <- 0.99 # discounting factor for determining relevance of points for integral
algo.par$expl.path.multi.Kd.disc <- 0.95 # discounting factor for determining derivative
algo.par$expl.path.multi.start.var <- 0.1
algo.par$expl.path.multi.start.shock <- 0.1
algo.par$expl.path.multi.start.surp <- 0
algo.par$expl.path.multi.start.fam <- 0
algo.par$expl.path.multi.start.frac.var <- 0.5
algo.par$expl.path.multi.start.frac.shock <- 0.5
algo.par$expl.path.multi.start.frac.surp <- 0
algo.par$expl.path.multi.start.frac.fam <- 0
algo.par$expl.path.multi.end.frac.var <- 0.5
algo.par$expl.path.multi.end.frac.shock <- 0.5
algo.par$expl.path.multi.end.frac.surp <- 0
algo.par$expl.path.multi.end.frac.fam <- 0
algo.par$gamma <- game.object$game.pars$delta
algo.par$a <- a.Q
algo.par$replay.intensive <- 1
algo.par$curio.beta <- 0
algo.par$block.curr <- 1
algo.par$block.best <- 1
algo.par$block.expl.var <- 0
algo.par$block.expl.shock <- 0
algo.par$block.expl.surp <- 0
algo.par$block.expl.fam <- 0
algo.par$block.expl.vs <- 0
algo.par$block.expl.multi <- 2
block.length <- (algo.par$block.curr+algo.par$block.best+algo.par$block.expl.var+algo.par$block.expl.shock+algo.par$block.expl.surp+algo.par$block.expl.fam+algo.par$block.expl.vs+algo.par$block.expl.multi)*game.object$game.pars$T
algo.par$force.last <- block.length*1
algo.par$batch.size <- block.length*5
algo.par$max.mem <- block.length*100
blocks <- block.no #new: 1000 #main 250
algo.par$hybrid.Q.a.MC <- a.MC
algo.par$hybrid.Q.apply <- "always" #also used for MC
algo.par$hybrid.decay <- hybrid.decay
algo.par$only.experienced <- TRUE
algo.par$use.rnn <- TRUE
algo.par$hybrid.Q <- TRUE
algo.par$Q.Learning <- FALSE
algo.par$MC <- FALSE
algo.par$hybrid.switch <- TRUE
model.par <- Get.Def.Par.RNN()
model.par$hidden.nodes <- c(128,64)
model.par$layer.type <- c("lstm","dense")
model.par$activation.hidden <- c("sigmoid","sigmoid")
model.par$dropout <- c(0,0) #Dropouts for the hidden layers. 0 deactivates.
model.par$recurrent.dropout <- c(0) #Should there be dropout in the recurrent layer?
model.par$input.dropout <- c(0) #Should there be a input dropout? NULL and 0 deactivates.
model.par$epochs <- 5
model.par$single.dimensional <- TRUE
model.par$batch.size.train <- 600
model.par$give.up.precision <- 10
if(continue){
load(paste0(file.name,".tmp"))
if(as.character(evaluator$model.best)=="<pointer: 0x0>"){
if(!is.null(evaluator$model.best.out.file)){
evaluator$model.best <- load_model_hdf5(filepath=evaluator$model.best.out.file)
} else {
stop("best.out.file not defined.")
}
}
if(as.character(evaluator$model.cur)=="<pointer: 0x0>"){
if(!is.null(evaluator$model.cur.out.file)){
evaluator$model.cur <- load_model_hdf5(filepath=evaluator$model.cur.out.file)
} else {
stop("cur.out.file not defined.")
}
}
} else {
evaluator <- Setup.QLearningPersExpPath(game.object, algo.par=algo.par, model.par=model.par)
algo.var <- Initialise.QLearningPersExpPath(game.object, algo.par, memory.init="self.play", memory.param=list(no=100), model.par=model.par)
}
res <- Train.QLearningPersExpPath(evaluator=evaluator, model.par=model.par, algo.par=algo.par, algo.var=algo.var, model.par.surp=NULL, model.par.fam=NULL, game.object = game.object, blocks=blocks, eval.only=FALSE, start.w.training = TRUE,out.file=paste0(file.name,".tmp"))
#Save Memory & model
evaluator <- res$evaluator
algo.var$memory <- res$algo.var$memory
algo.var$analysis <- res$algo.var$analysis
algo.var$epsilon <- res$algo.var$epsilon
algo.var$path.goal.var <- res$algo.var$path.goal.var
algo.var$path.goal.shock <- res$algo.var$path.goal.shock
algo.var$path.goal.surp <- res$algo.var$path.goal.surp
algo.var$path.goal.fam <- res$algo.var$path.goal.fam
algo.var$path.goal.multi <- res$algo.var$path.goal.multi
algo.var$expl.path.var <- res$algo.var$expl.path.var
algo.var$expl.path.shock <- res$algo.var$expl.path.shock
algo.var$expl.path.surp <- res$algo.var$expl.path.surp
algo.var$expl.path.fam <- res$algo.var$expl.path.fam
algo.var$expl.path.multi <- res$algo.var$expl.path.multi
#Save Memory & model
idio.name <- paste0("opt.run.RNN.full.",my.strat)
file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"),"before.StratTourn", sep=" ")
save(evaluator, algo.var, algo.par, game.object, model.par, file=file.name)
# Init game
game = make.pd.game(err.D.prob=0.15, delta=0.95)
strat = nlist(Model.strat.RNN.TimeSeries.minimal,get(my.strat), get(antistrat))
tourn.no.limit = init.tournament(game=game, strat=strat, game.seeds=234567)
set.seed(234567)
tourn.no.limit = run.tournament(tourn=tourn.no.limit, R = eval.no)
disable.restore.points(FALSE)
restore.point("after first tourn")
r.no.limit <- get.matches.vs.matrix(tourn.no.limit$dt)["Model.strat.RNN.TimeSeries.minimal","get(my.strat)"]
tourn = init.tournament(game=game, strat=strat)
set.seed(234567)
tourn = run.tournament(tourn=tourn, R = eval.no, T.max=rounds.no)
r.limit <- get.matches.vs.matrix(tourn$dt)["Model.strat.RNN.TimeSeries.minimal","get(my.strat)"]
file.name <- paste0(idio.name, format(Sys.time(), "%d-%b-%Y %H.%M"), sep=" ")
#Save Memory & model
save(evaluator, algo.var, algo.par, game.object, model.par, r.no.limit, r.limit, tourn, tourn.no.limit, file=file.name)
show.tournament(tourn)
return(r)
}
disable.restore.points(TRUE)
enableJIT(3)
generate.best.strat(strat=strat, antistrat=antistrat, a.MC=a.MC, a.Q=a.Q, hybrid.decay=hybrid.decay, block.no, eval.no, rounds.no=rounds.no, continue=continue, file.name=file.name)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.